//For India Replication paper, based on authors' code
// Matching analysis on DLHS3


clear all
capture log close

set mem 2000m
set matsize 2000

local source "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Births Datasets"

local working "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Matching"

use "`source'\DLHS3 Births.dta"

cd "`working'\restricted"


count  


// excluded Chandigarh from this analysis:
*drop if state=="Chandigarh" //43 obs deleted (or, if not restricted to births in last 12 months, then 184 obs deleted)


duplicates report obs

// Make variables that don't yet exist

// keep if state=="Madhya Pradesh" | state=="Rajasthan" | state=="Orissa" | state=="Assam"

// Attended birth (in-facility birth or skilled birth attendance)
gen ab = .
replace ab = 1 if ((ifb == 1)|(sba == 1))
replace ab = 0 if ((ifb == 0)&(sba == 0))


// Neonatal mortality or still birth
gen nmsb = .
replace nmsb = 1 if ((sb == 1)|(nnm == 1))
replace nmsb = 0 if ((sb == 0)&(nnm == 0))


/* Make variables that don't yet exist: */
/* Skilled birth attendance (delivery in a facility, or outside of a facility but with a skilled birth attendant present) */
capture drop ab
gen ab = .
replace ab = 1 if ((ifb == 1)|(sba == 1))
replace ab = 0 if ((ifb == 0)&(sba == 0))
/* Alternate skilled birth attendance (delivery in a facility, or outside of a facility but with a skilled attendant present) with the other, more restricted */
/* definition of skilled birth attendant. */
gen alt_ab = .
replace alt_ab = 1 if ((alt_sba == 1)|(ifb == 1))
replace alt_ab = 0 if ((alt_sba == 0)&(ifb == 0))
/* Stillbirth/Neonatal death (infant is either still-born, or born alive but dies within first month of life) */
capture drop nmsb
gen nmsb = .
replace nmsb = 1 if ((sb == 1)|(nnm == 1))
replace nmsb = 0 if ((sb == 0)&(nnm == 0))
/* Late neonatal mortality: */
gen lnm = . 
replace lnm = 1 if (nnm == 1)&(enm == 0)
replace lnm = 0 if (nnm == 0)|(enm == 1)


count /*59625 obs (or 187 154 if not restricted to births in last 12 months & deleted Chandigarh) */

// 187338 observations reported, no duplicates 

// keeping only variables needed for matching

keep  state dlhs3_country_quintile bpl_card urban caste religion mateduc_cat nb_cat matage_cat jsy obs ab anc3 ifb pnm enm lnm nmsb cmcage

order state obs jsy 

count if state=="." //0 missing
 

foreach var of varlist jsy - lnm {

	di "missing = .?"
	count if `var'==.
	di "missing = 99?"
	count if `var'==99
	
	}
	
// no values are already coded as 99 for missing


// dropping observations for which JSY or the outcome is missing:

drop if jsy==.   //This only deletes 51 obs (or 122 in full dataset)
drop if anc3 == .  //nothing deleted (or 3 in full dataset)
drop if ifb == .  //174 obs deleted  (or 538 in full dataset)
drop if pnm == .   // 11 obs deleted (or 35 in full dataset)
drop if enm == .  //951 obs deleted  (or 1742 in full dataset)
drop if lnm == . //nothing deleted (NC)
*drop if nnm == . // (drops 1742 in full dataset)
drop if ab == .  // 589 obs deletted (or 2060 in full dataset)
*drop if nmsb ==.  //nothing deleted

// replacing missing covariate values as 99  add back in: dlhs3_country_quintile 
foreach var of varlist  bpl_card urban caste religion mateduc_cat nb_cat matage_cat{

	replace `var' = 99 if `var' == .
	
	}
	
	
// checking for missing values 
foreach var of varlist jsy-lnm {
	count if `var'==.
	}
	
// now there are no missing values in the entire dataset

// generating new categories for matching

recode dlhs3_country_quintile (1 2 = 1) (3 = 2) (4 5 = 3), gen(wealthgroup)
tab dlhs3_country_quintile wealth
drop dlhs3_country_quintile

recode caste (1 2 = 1) (3 4 = 2) (99 = 99), gen(castegroup)
tab caste castegroup
drop caste

recode mateduc_cat (0 1 = 1) (2 3 = 2) (99 = 99), gen(educgroup)
tab mateduc_cat educgroup 
drop mateduc_cat

recode matage_cat (1 2 = 1) (3 4 5 6 = 2), gen(agegroup)
tab matage_cat agegroup
drop matage_cat

recode nb_cat (1 = 1) (2 = 2) (3 4 = 3) (99 = 99), gen(nbgroup)
tab nb_cat nbgroup
drop nb_cat

drop religion

count //57 850 obs or 182 654 in full dataset (or 182 834 in full dataset without removing any state) --> MUCH CLOSER TO PRE-MATCHING 182 869

levelsof state, local(state)


foreach s of local state {
	di "`s'"
	count if state=="`s'"
	}
	
/*
foreach s of local state {
	di `s'
	count if state==`s'
	}	
*/



//For matching while restricting to 6 months 
drop if state=="Chandigarh"
drop if state == "Andaman & Nicobar Islands"
drop if state == "Lakshadweep" /*(have to just run this state separately??) */
keep if  cmcage <= 6
count  /*32035*/
drop cmcage
outsheet using dlhs3_for_matching_RESTRICTED_6_months.csv, comma replace nolabel


//For the restricted data, must drop several states
//8 months, do the following states separately: 
//drop if state == "Lakshadweep"


//For matching while restricting to 12 months 
keep if  cmcage <= 12
count  /*57 890*/
drop cmcage
outsheet using dlhs3_for_matching_RESTRICTED_12_months.csv, comma replace nolabel


//outsheet using dlhs3_for_matching_RESTRICTED.csv, comma replace nolabel
//outsheet using dlhs3_for_matching_NOTRESTRICTED.csv, comma replace nolabel


** this is where the R code starts:
/*
#Matching Analysis for JSY Evaluation
#This code runs the MatchIt packages to produce a dataset matched on the variables we specified.

	# Part 1: Load Libraries
	library(MASS)
	library(Zelig)
	library(MatchIt)
	library(survey)
		
	# this is just old code, probably doesn't get used at all
	conf.B<-matrix(NA, nrow=34, ncol=4)		
	conf.A<-matrix(NA, nrow=34, ncol=4)		
	hg.meanB<-matrix(NA, nrow=34, ncol=2)
	hg.meanA<-matrix(NA, nrow=34, ncol=2)
	sample.all<-matrix(NA, nrow=34, ncol=2)
	sample.match<-matrix(NA, nrow=34, ncol=2)
	
	
	my.data<-read.table("dlhs3_for_matching_RESTRICTED.csv", header = T, sep=",") 
	countries.all<-sort(as.character(unique(my.data$state)))
	## the next part just loops through the states in India designated in the state variable, and within each state produce matched observations on the variables specified in line 31
	i <- 1
	for (x in countries.all) {
		x <-countries.all[i]
		print(x)
		z<-my.data[my.data$state==x,]	

		matchit.out <- matchit(jsy ~ bpl_card + urban + as.factor(wealthgroup) + as.factor(castegroup) + as.factor(educgroup) + as.factor(nbgroup) + as.factor(agegroup), data=z, method = "exact")

		m.data<-match.data(matchit.out, weights="weights")		

		
		write.csv(m.data, file=paste("J:/Project/Coverage/In-Facility Birth Project/Matching/mdata_",x,".csv", sep=""), sep=",", col.names=T, row.names=F, quote=F)
		i <- i+1
	}

	
** end R code

*/


cd "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Matching"

levelsof state, local(state)


clear


foreach s of local state {
	insheet using "m12modata_`s'.csv" /*careful here original (restricted) is mdata, restricted (12 months) is m12modata*/
	save "`s'.dta", replace
	clear
	}
	
clear

foreach s of local state {
	append using "`s'.dta"
	}


save matched_12months_dlhs3.dta, replace

count /*NC: 47038 obs (Restricted to 12 months, no Chandigarh) or 163 906 if didn't restrict to 12 months; or 33 029 if restricted to 8 months; 24 331 if restricted to 6 months*/

clear

//cd "J:\Project\Evaluation\In-Facility Birth Project\Births Datasets"
cd "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Births Datasets"
//cd "\Volumes\FreeAgent GoFlex Drive\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Data\Births Datasets"


// using the DLHS3 births dataset produced by Joe and Steve:
use "DLHS3 Births.dta", clear


//cd "J:\Project\Evaluation\In-Facility Birth Project\Matching"
cd "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Matching"
//cd "\Volumes\FreeAgent GoFlex Drive\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Matching"

gen dwu = dweight*dm_coef
gen swu = dweight*sm_coef

label var jsy "JSY"

recode bpl_card (2 = 0)   
capture label drop bpl_card
label define bpl_card 1 "yes" 0 "no" 8 "DK"
label values bpl_card bpl_card


//NC do counts below
count  /*187338*/
count if (((dlhs == 3)&(cmcage <= 12)))  /*59668*/


// generating outcome variables in case they don't exist:
quietly {
// keep if  (((dlhs == 3)&(cmcage <= 12)))


/* Make variables that don't yet exist: */
/* Skilled birth attendance (delivery in a facility, or outside of a facility but with a skilled birth attendant present) */
capture drop ab
gen ab = .
replace ab = 1 if ((ifb == 1)|(sba == 1))
replace ab = 0 if ((ifb == 0)&(sba == 0))
/* Alternate skilled birth attendance (delivery in a facility, or outside of a facility but with a skilled attendant present) with the other, more restricted */
/* definition of skilled birth attendant. */
gen alt_ab = .
replace alt_ab = 1 if ((alt_sba == 1)|(ifb == 1))
replace alt_ab = 0 if ((alt_sba == 0)&(ifb == 0))
/* Stillbirth/Neonatal death (infant is either still-born, or born alive but dies within first month of life) */
capture drop nmsb
gen nmsb = .
replace nmsb = 1 if ((sb == 1)|(nnm == 1))
replace nmsb = 0 if ((sb == 0)&(nnm == 0))
/* Late neonatal mortality: */
gen lnm = . 
replace lnm = 1 if (nnm == 1)&(enm == 0)
replace lnm = 0 if (nnm == 0)|(enm == 1)
}


//Now merge in the matched dataset

//merge 1:1 obs jsy using "J:\Project\Evaluation\In-Facility Birth Project\Matching\matched_dlhs3.dta" 
//destring state, replace
ren state statestring
merge 1:1 obs jsy using "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Matching\matched_dlhs3.dta" 

count 

drop if _merge != 3
drop _merge

count 

set more off

ren weights matching_weights

*to run high focus states: if state_cat == 1 

char matage_cat[omit] 4
char caste[omit] 4
char birth_interval[omit] 3
char yearbirth[omit] 2008

local covar_list = "i.matage_cat i.nb_cat  i.birth_interval i.mult_birth i.mateduc_cat i.dlhs3_country_decile i.caste i.religion i.res_cat DIST_mean_hh_p_income3 i.state"

disp "running logistic regression of outcomes dependent on JSY and other covariates - maternal age, number of births, birth interval, multiple births, maternal education, wealth deciles, caste, religion, residence category, district mean PI, and state fixed effects"
		
di "NATIONAL: conducting MV logistic regression on NATIONAL matched dataset using the matching weights"

		foreach outcome of varlist  anc3 ifb ab pnm  nnm  {
                set more off
				/*char state[omit] 9*/
				char state[omit] "Uttar Pradesh"
					
				di "running MV logit model for `outcome' for NATIONAL LEVEL:"
				
				xi: logit `outcome' jsy  `covar_list' if state_cat != 1 [pweight=matching_weights], or
				
				margins [pweight=dwu], at(jsy=1) atmeans
				margins [pweight=dwu], at(jsy=0) atmeans
				
				estimates store `outcome'
				 
			}	 
	/*estimates table anc3 ifb ab pnm  nnm, b(%4.3f)  se(%4.3f)  p(%4.3f) stats(N) title("`outcome' - relative risk national ")*/
	estout anc3 ifb ab pnm  nnm   using "Matching Outcomes - 25 April 2011 ORs_Non-High Focus dataset.txt", style(tab) cells("b se p") stats(N) eform replace


di "end of NATIONAL analysis"



log using "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Log files\Final State level regression results Unrestricted.smcl", replace


*Run high focus states individually: (state_cat == 1 ) 


foreach statename in "Madhya Pradesh" "Orissa" "Rajasthan" "Uttarakhand" "Chhattisgarh" "Uttar Pradesh" "Jharkhand" "Jammu & Kashmir" {

preserve
keep if state == "`statename'" 

char matage_cat[omit] 4
char caste[omit] 4
char birth_interval[omit] 3

local covar_list = "i.matage_cat i.nb_cat  i.birth_interval i.mult_birth i.mateduc_cat i.dlhs3_country_decile i.caste i.religion i.res_cat DIST_mean_hh_p_income3 i.district_wncode_dlhs3"

disp "running logistic regression of outcomes dependent on JSY and other covariates - maternal age, number of births, birth interval, multiple births, maternal education, wealth deciles, caste, religion, residence category, district mean PI, and district fixed effects"
		
di "NATIONAL: conducting MV logistic regression on STATE matched dataset using the matching weights on Unrestricted data"

		foreach outcome of varlist  anc3 ifb ab pnm  nnm  {
                set more off
								
				di "running MV logit model for `outcome' for `statename':"
				
				xi: logit `outcome' jsy  `covar_list' if state == "`statename'" [pweight=matching_weights], or
				
				margins [pweight=swu], at(jsy=1) atmeans
				margins [pweight=swu], at(jsy=0) atmeans
				
				estimates store `outcome'
				 
			}	 
	estout anc3 ifb ab pnm  nnm   using "Matching Outcomes - 27 April 2011 ORs_`statename' Unrestricted dataset.txt", style(tab) cells("b se p") stats(N) eform replace

restore

}

di "end of STATE-LEVEL (for HIGH FOCUS STATES) analysis"



log using "E:\Natalie\PhD Health Policy\Classes\Gov 2001\Replication paper\Project\Log files\Final State level regression results Unrestricted non high focus.smcl", replace

*Run high focus states individually: (state_cat != 1 ) 
//Errors (converstion or other): "Andaman & Nicobar Islands"  "Arunachal Pradesh" "Chandigarh"  "Dadra & Nagar Haveli" "Daman & Diu"   "Goa"  "Himachal Pradesh" "Kerala" 
// "Manipur"  "Mizoram"


foreach statename in  "Andhra Pradesh"    "Delhi" "Gujarat"  "Haryana"  "Karnataka" "Lakshadweep"  "Maharashtra"   "Meghalaya"  "Punjab" "Sikkim" "Tamil Nadu" "Tripura"  "West Bengal"   {

preserve
keep if state == "`statename'" 

char matage_cat[omit] 4
char caste[omit] 4
char birth_interval[omit] 3
//char yearbirth[omit] 2008  /*Why was this in here?? Have removed it*/

local covar_list = "i.matage_cat i.nb_cat  i.birth_interval i.mult_birth i.mateduc_cat i.dlhs3_country_decile i.caste i.religion i.res_cat DIST_mean_hh_p_income3 i.district_wncode_dlhs3"

disp "running logistic regression of outcomes dependent on JSY and other covariates - maternal age, number of births, birth interval, multiple births, maternal education, wealth deciles, caste, religion, residence category, district mean PI, and district fixed effects"
		
di "NATIONAL: conducting MV logistic regression on STATE matched dataset using the matching weights on Unrestricted data"

		foreach outcome of varlist  anc3 ifb ab   {
                set more off
								
				di "running MV logit model for `outcome' for `statename':"
				
				xi: logit `outcome' jsy  `covar_list' if state == "`statename'" [pweight=matching_weights], or
				
				margins [pweight=swu], at(jsy=1) atmeans
				margins [pweight=swu], at(jsy=0) atmeans
				
				*estimates store `outcome'
				 
			}	 
	/*estimates table anc3 ifb ab , b(%4.3f)  se(%4.3f)  p(%4.3f) stats(N) title("`outcome' - relative risk national ")*/
	*estout anc3 ifb ab pnm  nnm   using "Matching Outcomes - 27 April 2011 ORs_`statename' 12 months restricted dataset.txt", style(tab) cells("b se p") stats(N) eform replace
	*estout anc3 ifb ab pnm  nnm   using "Matching Outcomes - 27 April 2011 ORs_`statename' Unrestricted dataset.txt", style(tab) cells("b se p") stats(N) eform replace

restore

}

di "end of STATE-LEVEL (for NOT HIGH FOCUS STATES) analysis"
